### Part I. CI functions
## 1-1 [CI] Base functions: 1.1.1 CI.base, 1.1.2 Variance estimators
# Library ##############################################################################
library(rootSolve) # for Newton-Raphson Method (0.2.4)
library(sn) # for Owen's T-function in Double Gaussian Method (0.2.15)
# CI methods : ref to 2-3.[SIM] Simulator, 3.evaluation
CI.methods = c("Bm", "HM1", "HM2", "NS1", "NW", "NS2", "Mee", "DL", "RG", "CM", "DB", "DG", "RJ")
### Descriptions
# Bm : Bamber
# HM1 : Hanley-McNeil 1 (Wald, nonparametric var)
# HM2 : Hanley-McNeil 2 (Wald, parametric var:exponential)
# NS1 : Newcombe Score1 (Score + HM2)
# NW : Newcombe Wald (HM2 + balanced design)
# NS2 : Newcombe Score 2 (NS1 + balanced design)
# Mee : Mee (Score)
# DL : DeLong (Wald)
# RG : Reiser-Guttman (Parametric: Gaussian)
# DB : Double beta (Parametric: double beta)
# DG : Double Gaussian (Parametric: double Gaussian)
# CM : Cortes-Mohri (Wald, Combinatoric variance)
### "CP","WS" => excluded from this study: not AUC specific
########################################################################################
## 1.1.1 Basic internal functions ######################################################
# CI.base, AUC.classic, AUC
CI.base <- function(mu.hat, Var, alpha, dist=qnorm,...) {
return(mu.hat + c(-1,1) * dist(1-alpha/2,...)*sqrt(Var))
}
# recommended when the biomarker is discrete. (modified June 2016)
AUC <- function(x, y, n.x=length(x), n.y=length(y), data=NA, disease="disease", marker="marker") {
if (!is.na(data[1])[1]) {
xy = data2xy(data, disease=disease, marker=marker)
x = xy$x
y = xy$y
n.x = length(x); n.y = length(y)
}
A <- 0; for (i in 1:n.x) { A = A + ( sum(y > x[i]) + sum(y==x[i])/2 ) }
return(A = A/ n.x / n.y)
}
# new AUC function added with much improved efficiency (but not precise when the biomarker is discrete.)
# When discrete, use AUC.classic. discrete is TRUE by default. use FALSE when simulating/finding parameters.
AUC.2 <- function(x, y, n.x=length(x), n.y=length(y), data=NA, disease="disease", marker="marker", discrete=TRUE) {
if (discrete) {
result = AUC(x=x, y=y, n.x=n.x, n.y=n.y, data=data, disease=disease, marker=marker)
return(result)
} else {
if (!is.na(data[1])[1]) {
xy = data2xy(data, disease=disease, marker=marker)
x = xy$x
y = xy$y
}
predictions = c(x,y)
labels = c(rep(0,n.x), rep(1,n.y))
pred.order = order(predictions, decreasing = TRUE)
predictions.sorted = predictions[pred.order]
tp = cumsum(labels[pred.order] == 1)
fp = cumsum(labels[pred.order] == 0)
dups <- rev(duplicated(rev(predictions.sorted)))
tp <- c(0, tp[!dups])
fp <- c(0, fp[!dups])
#cutoffs <- c(Inf, predictions.sorted[!dups])
# excerpted until here from <ROCR> prediction function
tpr <- tp[-1]/max(tp)
d.fp <- fp[-1] - fp[-length(fp)]
d.fpr <- d.fp/max(fp)
return(sum(tpr*d.fpr))
}
}
# data converter: from dataframe to x, y vectors
# (dropped missing disease status. June 2016)
data2xy <- function(data, disease="disease", marker="marker") {
data = data[!is.na(data[,disease]),]
x = data[data[,disease]==0, marker]
y = data[data[,disease]==1, marker]
return(list(x=x,y=y))
}
# logit, expit
logit <- function(data, LT=TRUE) {
if (LT) {return(log(data/(1-data)))} else {return(data)}
}
expit <- function(data, LT=TRUE) {
if (LT) {return((1+exp(data)^-1)^-1)} else {return(data)}
}
## 1.1.2 variance estimators ###########################################################
# Q.stat for HM and its derivatives
# (added the half of the tie cases in June 2016)
Q.stat <- function(x, y, n.x=length(x), n.y=length(y)) {
Q2 <- Q1 <- 0
for (i in 1:n.x) {Q1 = Q1 + ( sum(y > x[i]) + sum(y == x[i])/2 )^2}; Q1 = Q1 /(n.x * n.y^2)
for (j in 1:n.y) {Q2 = Q2 + ( sum(y[j] > x) + sum(y[j] == x)/2 )^2}; Q2 = Q2 /(n.x^2 * n.y)
return(data.frame(Q1 = Q1, Q2 = Q2))
}
# p(Y=X)
p.XY <- function(x, y, n.x=length(x), n.y=length(y)) {
pXY <- 0
for (i in 1:n.x) {pXY = pXY + sum(y == x[i])}; pXY = pXY /(n.x * n.y)
return(pXY)
}
V.HM <- function(AUC, n.x, n.y, Q1 = AUC/(2-AUC), Q2=2* AUC^2 /(AUC + 1), pXY, LT=FALSE, NC=FALSE, sample.var=TRUE, ...) {
# default: HME method
N = (n.x+n.y)/2
if (LT) {div.LT = (AUC*(1-AUC))^2} else {div.LT=1}
nxny = ifelse(sample.var, (n.x-1)*(n.y-1), n.x*n.y) # sample.var: dividing by (nx-1)(ny-1) is unbiased
if (NC==FALSE) {V = (AUC * (1-AUC) - pXY/4 + (n.y -1)*(Q1 - AUC^2) + (n.x-1)*(Q2 - AUC^2)) /nxny /div.LT}
else {V = AUC*(1-AUC)/nxny * ( (2*N-1) - (3*N-3) / ((2-AUC)*(AUC+1)) )/div.LT}
return(V)
}
### for score method it should be populatoin variance not sample variance (sample.var=FALSE).
HMS.equation = function(AUC, AUC.hat, n.x, n.y, pXY, alpha, MI=NA, LT=FALSE, NC=FALSE, sample.var=FALSE, score.MI = "fixed.r") {
W = V.HM(AUC, n.x, n.y, pXY=pXY, LT=LT, NC=NC, sample.var=sample.var)
if (score.MI == "fixed.r") {
## 1. fixed r
W.hat = V.HM(AUC.hat, n.x, n.y, pXY=pXY, LT=LT, NC=NC, sample.var=sample.var)
Rubin = Rubin(W=W.hat, MI=MI, alpha=alpha, print.r=TRUE)
z.val = Rubin$z.val # nu is fixed (since r is fixed)
r = Rubin$r
return((logit(AUC,LT=LT) - min(10,logit(AUC.hat,LT=LT)))^2 - z.val^2*W*(1+r) )
} else if (score.MI == "fixed.B") {
## 2. fixed B hat
Rubin = Rubin(W=W, MI=MI, alpha=alpha)
V = Rubin$v.final # W + (1/m)*B for MI
z.val = Rubin$z.val # t value for MI
return((logit(AUC,LT=LT) - min(10,logit(AUC.hat,LT=LT)))^2 - z.val^2*V)
}
}
HM.coef = function(AUC.hat, n.x, n.y, pXY, NC=FALSE, alpha, r=0){ #coefficients of expanded equation to be solved by polyroot
if (NC==FALSE) {n.x0 = n.x; n.y0 = n.y}
else {n.x0 <- n.y0 <- (n.x+n.y)/2}
z = qnorm(1-alpha/2)
t4 = (n.x0 + n.y0 -1)*(1+r)*z^2 + 1*n.x*n.y
t3 = (-3*n.x0 -n.y0 +2)*(1+r)*z^2 +(-2 *AUC.hat -1)*n.x*n.y
t2 = (2*n.x0 -n.y0 -2 + pXY/4)*(1+r)*z^2 + (AUC.hat^2 + 2*AUC.hat -2)*n.x*n.y
t1 = (n.y0 +1 - pXY/4)*(1+r)*z^2 +(-AUC.hat^2 + 4*AUC.hat)*n.x*n.y
t0 = (-pXY/2)*(1+r)*z^2 -2*AUC.hat^2*n.x*n.y
return(c(t0, t1, t2, t3, t4))
}
V.CM <- function(AUC, n.x, n.y, LT=FALSE){
if (is.na(AUC)) {return(V=NA)} else {
z1 = function (w, n.x, n.y, k) {1-0.5*(w/n.y+(k-w)/n.x)}
z2 = function (w, n.x, n.y, k)
(n.x*w^2+n.y*(k-w)^2+n.x*(n.x+1)*w+n.y*(n.y+1)*(k-w)-2*w*(k-w)*(n.x+n.y+1))/(12*n.x^2*n.y^2)
F.base <- function (w, n.x, n.y, k) {choose(n.x-k+2*w,w)*choose(n.y+k-2*w,k-w)}
k=round((n.x+n.y)*(1-AUC))
F1=F2=F3=F.den=0
for (w in 0:k){
F1 = F1 + F.base(w, n.x, n.y, k)*z1(w, n.x, n.y, k)^2
F.den = F.den + F.base(w, n.x, n.y, k)
F2 = F2 + F.base(w, n.x, n.y, k)*z1(w, n.x, n.y, k)
F3 = F3 + F.base(w, n.x, n.y, k)*z2(w, n.x, n.y, k)
}
if (LT) {div.LT = (AUC*(1-AUC))^2} else {div.LT=1}
return(V = (F1/F.den - F2^2/F.den^2 + F3/F.den)/div.LT)
}
}
probit.RG <- function(x, y, n.x=length(x), n.y=length(y)){
mu.x = mean(x); mu.y = mean(y)
sig.x = sd(x); sig.y = sd(y); s2 = sig.x^2 + sig.y^2
d = (mu.y-mu.x)/sqrt(s2)
f = s2^2/((sig.x^4/(n.x-1))+(sig.y^4/(n.y-1)))
M = s2/((sig.x^2/n.x)+(sig.y^2/n.y))
V = 1/M + d^2/(2*f)
return(data.frame(delta=d, V=V))
}
V.Bm.old <- function(x, y, n.x=length(x), n.y=length(y), AUC.hat=AUC(x,y,n.x,n.y), LT=FALSE, sample.var=TRUE){
b.yyx <- b.xxy <- p.xy <- 0
for (i in 1:n.x) {b.yyx = b.yyx + sum(y < x[i])^2 + sum(x[i] < y)^2 -2*sum(y < x[i])*sum(x[i] < y) }
b.yyx = b.yyx/(n.x*n.y^2)
for (j in 1:n.y) {b.xxy = b.xxy + sum(x < y[j])^2 + sum(y[j] < x)^2 -2*sum(x < y[j])*sum(y[j] < x) }
b.xxy = b.xxy/(n.y*n.x^2)
for (i in 1:n.x) { p.xy = p.xy + sum(y != x[i] ) }
p.xy = p.xy /(n.x*n.y)
nxny = ifelse(sample.var, (n.x-1)*(n.y-1), n.x*n.y) # sample.var: dividing by (nx-1)(ny-1) is unbiased
if (LT) {div.LT = (AUC.hat*(1-AUC.hat))^2} else {div.LT=1}
return(1/(4*nxny)*(p.xy + (n.y -1)*b.xxy + (n.x -1)*b.yyx - 4*(n.y + n.x -1)*(AUC.hat -0.5)^2)/div.LT)
}
## replaced V.Bm.old with new one to reflect "calculating b-stats without replacement"
V.Bm <- function(x, y, n.x=length(x), n.y=length(y), AUC.hat=AUC(x,y,n.x,n.y), LT=FALSE, sample.var=TRUE){
b.yyx <- b.xxy <- p.xy <- 0
for (i in 1:n.x) {b.yyx = b.yyx + sum(y < x[i])*{sum(y < x[i])-1} + sum(x[i] < y)*{sum(x[i] < y)-1} -2*sum(y < x[i])*sum(x[i] < y) }
b.yyx = b.yyx/(n.x*n.y*(n.y-1))
for (j in 1:n.y) {b.xxy = b.xxy + sum(x < y[j])*{sum(x < y[j])-1} + sum(y[j] < x)*{sum(y[j] < x)-1} -2*sum(x < y[j])*sum(y[j] < x) }
b.xxy = b.xxy/(n.y*n.x*(n.x-1))
for (i in 1:n.x) { p.xy = p.xy + sum(y != x[i] ) }
p.xy = p.xy /(n.x*n.y)
nxny = ifelse(sample.var, (n.x-1)*(n.y-1), n.x*n.y) # sample.var: dividing by (nx-1)(ny-1) is unbiased
if (LT) {div.LT = (AUC.hat*(1-AUC.hat))^2} else {div.LT=1}
return(1/(4*nxny)*(p.xy + (n.y -1)*b.xxy + (n.x -1)*b.yyx - 4*(n.y + n.x -1)*(AUC.hat -0.5)^2)/div.LT)
}
# p.stat for Halperin Mee method
p.stat <- function(x, y, n.x=length(x), n.y=length(y)) {
Q = Q.stat(x, y, n.x, n.y); Q1 <- Q$Q1; Q2 <- Q$Q2
y.mat <- matrix(rep(y,n.x),ncol=n.x)
x.mat <- t(matrix(rep(x,n.y),nrow=n.x))
U.sq <- sum(((y.mat > x.mat)+(y.mat == x.mat)/2)^2)
p2 <- (n.x*n.y^2*Q1 - U.sq)/(n.x*n.y*(n.y-1))
p1 <- (n.y*n.x^2*Q2 - U.sq)/(n.y*n.x*(n.x-1))
return(data.frame(p1=p1,p2=p2))
}
##### after validation of new one, replace old one.
Mee.stat.old <- function(x, y, n.x=length(x), n.y=length(y)) {
ranks = rank(c(x,y))
rank.x <- ranks[1:n.x]
rank.y <- ranks[-1:-n.x]
AUC.hat <- AUC(x, y, n.x, n.y)
increment = ifelse(AUC.hat < 0.5, +0.5, -0.5)
while (min(AUC.hat,1-AUC.hat)*sqrt(n.x*n.y) < 0.5){
rank.y = rank.y + increment
x <- rank.x ; y <- rank.y
AUC.hat = AUC(x, y, n.x, n.y)
}
p = p.stat(x, y, n.x, n.y)
rho.hat = (p-AUC.hat^2)/(AUC.hat-AUC.hat^2)
N.J.hat = n.x*n.y/(((n.x-1)*rho.hat$p1 +1)/(1-1/n.y) + ((n.y-1)*rho.hat$p2 + 1 )/(1-1/n.x) )
return(data.frame(p1=p$p1, p2=p$p2, AUC.hat=AUC.hat, N.J.hat=N.J.hat))
}
### new one. replace the old one after validation!!
Mee.stat <- function(x, y, n.x=length(x), n.y=length(y)) {
AUC.hat.0 <- AUC.hat <- AUC(x, y, n.x, n.y)
if (is.na(AUC.hat)) {
return(data.frame(AUC.hat=NA, p1=NA, p2=NA, N.J.hat=NA))
} else if (n.x*n.y==0) {
return(data.frame(AUC.hat=AUC.hat, p1=NA, p2=NA, N.J.hat=NA))
} else {
k <- 0
increment = ifelse(AUC.hat < 0.5, +0.5, -0.5)
while (min(AUC.hat,1-AUC.hat)*sqrt(n.x*n.y) < 0.5 & k <= 100){
# print(c(AUC.hat, k))
k <- k+1
y = y + increment
AUC.hat = AUC(x, y, n.x, n.y)
}
p = p.stat(x, y, n.x, n.y)
rho.hat = (p-AUC.hat^2)/(AUC.hat-AUC.hat^2)
N.J.hat = n.x*n.y/(((n.x-1)*rho.hat$p1 +1)/(1-1/n.y) + ((n.y-1)*rho.hat$p2 + 1 )/(1-1/n.x) )
return(data.frame(AUC.hat=AUC.hat.0, p1=p$p1, p2=p$p2, N.J.hat=N.J.hat))
}
}
V.Mee.old <- function(x, y, n.x=length(x), n.y=length(y), AUC=AUC(x,y,n.x,n.y), LT=FALSE, N.J.hat=NA, sample.var=TRUE) {
# sample.var is not relevant. for compatibility purpose. (V.Mee is upward biased)
if (is.na(N.J.hat)) {N.J.hat = Mee.stat(x,y,n.x,n.y)$N.J.hat}
V = AUC*(1-AUC) / N.J.hat
if (LT) {div.LT = (AUC*(1-AUC))^2} else {div.LT=1}
return( V/div.LT )
}
Mee.equation.old <- function(AUC, AUC.hat, x, y, n.x, n.y, alpha, MI=NA, LT=FALSE, sample.var=TRUE, score.MI = "fixed.r"){
W = V.Mee(x, y, n.x, n.y, AUC, LT=LT, sample.var=sample.var)
if (score.MI == "fixed.r") {
## 1. fixed r
W.hat = V.Mee(x, y, n.x, n.y, AUC.hat, LT=LT, sample.var=sample.var)
Rubin = Rubin(W=W.hat, MI=MI, alpha=alpha, print.r=TRUE)
z.val = Rubin$z.val # nu is fixed (since r is fixed)
r = Rubin$r
return((logit(AUC,LT=LT) - logit(AUC.hat,LT=LT))^2 - z.val^2*W*(1+r) )
} else if (score.MI == "fixed.B") {
Rubin = Rubin(W=W, MI=MI, alpha=alpha)
V = Rubin$v.final # W + (1/m)*B for MI
z.val = Rubin$z.val # t value for MI
return((logit(AUC,LT=LT) - logit(AUC.hat,LT=LT))^2 - z.val^2*V)
}
}
V.Mee <- function(AUC, N.J.hat, LT=FALSE, sample.var=TRUE) {
# sample.var is not relevant. for compatibility purpose. (V.Mee is upward biased)
V = AUC*(1-AUC) / N.J.hat
if (LT) {div.LT = (AUC*(1-AUC))^2} else {div.LT=1}
return( V/div.LT )
}
Mee.equation <- function(AUC, AUC.hat, N.J.hat, alpha, MI=NA, LT=FALSE, sample.var=TRUE, score.MI = "fixed.r"){
W = V.Mee(AUC, N.J.hat, LT=LT, sample.var=sample.var)
if (score.MI == "fixed.r") {
## 1. fixed r
W.hat = V.Mee(AUC.hat, N.J.hat, LT=LT, sample.var=sample.var)
Rubin = Rubin(W=W.hat, MI=MI, alpha=alpha, print.r=TRUE)
z.val = Rubin$z.val # nu is fixed (since r is fixed)
r = Rubin$r
return((logit(AUC,LT=LT) - logit(AUC.hat,LT=LT))^2 - z.val^2*W*(1+r) )
} else if (score.MI == "fixed.B") {
Rubin = Rubin(W=W, N.J.hat, MI=MI, alpha=alpha)
V = Rubin$v.final # W + (1/m)*B for MI
z.val = Rubin$z.val # t value for MI
return((logit(AUC,LT=LT) - logit(AUC.hat,LT=LT))^2 - z.val^2*V)
}
}
Mee.coef = function(AUC.hat, N.J.hat, alpha, r=0){ #coefficients of expanded equation to be solved by to polyroot
z = qnorm(1-alpha/2)
t2 = (1+r)*z^2 + N.J.hat
t1 = -(1+r)*z^2 + -2*AUC.hat*N.J.hat
t0 = AUC.hat^2 * N.J.hat
return(c(t0, t1, t2))
}
V.DB <- function(alp, n.x, n.y, LT=FALSE) {
R1 = gamma(alp + 1)^2 / gamma(2*alp + 1)
R2 = gamma(2*alp +1)*gamma(alp +1)/gamma(3*alp+1)
theta = 1 - R1
Q = 1 - 2*R1 + R2
if (LT) {div.LT = (theta*(1-theta))^2} else {div.LT=1}
return(V = (theta*(1-theta) + (n.x+n.y-2)*(Q-theta^2))/n.x/n.y/div.LT)
}
DB.equation <- function(alp, AUC.hat, n.x, n.y, alpha, MI=NA, LT=FALSE){
AUC = 1 - gamma(alp + 1)^2 / gamma(2*alp + 1)
W = V.DB(alp, n.x, n.y, LT=LT)
Rubin = Rubin(W=W, MI=MI, alpha=alpha)
V = Rubin$v.final # W + (1/m)*B for MI
z.val = Rubin$z.val # t value for MI
return(logit(AUC,LT=LT) + c(+1,-1)*z.val*sqrt(V) - logit(AUC.hat,LT=LT))
}
DB <- function(AUC.hat, n.x, n.y, alpha,...) {
start.1 <- min(3, max(-log(1-AUC.hat,2) - 1, 1))
start.2 <- min(7, -log(1-AUC.hat,2) + 1)
start <- c(start.1, start.2)
alp <- multiroot2(DB.equation, start, AUC.hat=AUC.hat, n.x=n.x, n.y=n.y, alpha=alpha,...)$root
return(1 - gamma(alp + 1)^2 / gamma(2*alp + 1))
}
V.DG <- function(delta, n.x, n.y, LT=FALSE){
theta = pnorm(delta/sqrt(2))
Owen = T.Owen(delta/sqrt(2), 1/sqrt(3))
if (LT) {div.LT = (theta*(1-theta))^2} else {div.LT=1}
return(V = ((n.x + n.y - 1)*theta*(1-theta) - 2*(n.x + n.y -2) * Owen ) / (n.x * n.y) /div.LT)
}
DG.equation <- function(delta, AUC.hat, n.x, n.y, alpha, MI=NA, LT=FALSE) {
W = V.DG(delta, n.x, n.y, LT=LT)
Rubin = Rubin(W=W, MI=MI, alpha=alpha)
V = Rubin$v.final # W + (1/m)*B for MI
z.val = Rubin$z.val # t value for MI
AUC = pnorm(delta/sqrt(2))
return(logit(AUC,LT=LT) + c(+1,-1)*z.val*sqrt(V) - logit(AUC.hat,LT=LT))
}
DG <- function(AUC.hat, n.x, n.y, alpha,...) {
delta <- multiroot2(DG.equation, c(0.5, 0.9), AUC.hat=AUC.hat, n.x=n.x, n.y=n.y, alpha=alpha,...)$root
return(pnorm(delta/sqrt(2)))
}
S.stat <- function(x, y, n.x=length(x), n.y=length(y), A = AUC(x, y, n.x, n.y), LT=FALSE) {
ranks = rank(c(x,y))
rank.x = ranks[1:n.x] ; rank.y = ranks[-1:-n.x] # ranks within both x and y
rank.i = rank(rank.x) ; rank.j = rank(rank.y) # ranks within each
S2.10 = ( sum((rank.x - rank.i)^2) - n.x*(mean(rank.x) - (n.x+1)/2)^2 ) / ((n.x-1)*n.y^2)
S2.01 = ( sum((rank.y - rank.j)^2) - n.y*(mean(rank.y) - (n.y+1)/2)^2 ) / ((n.y-1)*n.x^2)
if (LT) {div.LT = (A*(1-A))^2} else {div.LT=1}
return(S = sqrt((n.x * S2.01 + n.y * S2.10)/(n.x + n.y)/div.LT))
}
V.DeLong <- function(x, y, n.x=length(x), n.y=length(y), A = AUC(x, y, n.x, n.y), LT=FALSE) {
D2 <- D1 <- 0
for (i in 1:n.x) {D1 = D1 + ( sum(y > x[i]) / n.y - A )^2}; D1 = D1 /(n.x * (n.x - 1))
for (j in 1:n.y) {D2 = D2 + ( sum(y[j] > x) / n.x - A )^2}; D2 = D2 /(n.y * (n.y - 1))
if (LT) {div.LT = (A*(1-A))^2} else {div.LT=1}
return(V = (D1 + D2)/div.LT)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.